pacman::p_load(tidyverse, data.table, broom, readxl, writexl, openxlsx)
rm(list=ls())
`%!in%` = Negate(`%in%`) 
################################################################################

spatial_id_a = 'county_id'
spatial_id_b = 'amc_id'
year_ar = 2018
year_br = 2017

#Import spatial units
df = fread('../../data/landuse/clean/geographicunits_argentina.csv.gz')[state!='CAPITAL FEDERAL',]
setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
df$spatial_id_estimation = df$spatial_id
df$county_id=df$spatial_id
df_a = df[, list(country, region, spatial_id_estimation, spatial_id, land_area_km2)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, spatial_id_estimation, spatial_id)]
df_a_county = df[, list(country, region, county_id, spatial_id_estimation, spatial_id, land_area_km2)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, county_id, spatial_id_estimation, spatial_id)]

df = fread('../../data/landuse/clean/geographicunits_brazil.csv.gz')[amc_id!='BR7061',][state!='DISTRITO FEDERAL']
setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
df$spatial_id_estimation = df$spatial_id
df_b = df[, list(country, region, spatial_id_estimation, spatial_id, land_area_km2)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, spatial_id_estimation, spatial_id)]
df_b_county = df[, list(country, region, county_id, spatial_id_estimation, spatial_id, land_area_km2)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, county_id, spatial_id_estimation, spatial_id)]

#Bind both countries
df_sa_units_cw = rbind(df_a_county,df_b_county)
df_sa_units = df_sa_units_cw[, list(country, region, spatial_id, land_area_km2)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, spatial_id)]
spatial_id_list = unique(df_sa_units$spatial_id)

#Frontier crosswalks
df = df_sa_units[, list(spatial_id, region)]
df$frontier=0
frontier_list = c('NORTE','NORDESTE','NOA','NEA','PATAGONIA','CUYO')
df[region %in% frontier_list,]$frontier=1
df_frontier = df[,list(spatial_id,frontier)]

#Order spatial id by crosswalks
ctype_list = c('aboveground','belowground')
cmeasure = 'median'
df = fread("../../data/environmental/clean/carbonstocks_argentinabrazil.csv.gz")[type %in% ctype_list,]
setnames(df, old=c(cmeasure), new=c('tC_per_ha'))
df = df[,list(county_id,tC_per_ha)]
df = merge(df,df_sa_units_cw,by='county_id')
df_em = df[, list(spatial_id, tC_per_ha)][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id)]
df_sa_units = merge(df_sa_units, df_em, by='spatial_id')



################################ Supply parameters


######### Land productivity parameters

for (country in c('argentina','brazil') ){
  
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))
  df_l = merge(df_l,df_sa_units_cw[,list(country, county_id, spatial_id, region, land_area_km2)], by='county_id')
  
  df_yp = fread(paste0('../../data/agronomy/clean/pastureproductivity_argentinabrazil.csv.gz'))[unit=='tonne per hectare',]
  df_yp = merge(df_yp,df_sa_units_cw[,list(county_id, spatial_id)], by='county_id')
  setnames(df_yp, old = c('int_mean','crop'),  new = c('yield','commodity'))
  
  df_yc = fread(paste0('../../data/agronomy/clean/faogaez_',country,'.csv.gz'))
  df_yc = merge(df_yc,df_sa_units_cw[,list(county_id, spatial_id)], by='county_id')
  setnames(df_yc, old = c('int_mean','crop'),  new = c('yield','commodity'))
  
  crop_list = c('soybean','maize') 
  commodity_list = c('soybean','maize','pasture')
  
  #Unobserved productivity shocks
  df = fread(paste0("inputs/parameters_supply_across_zetat_level_curated.csv.gz"))
  df$zetat = df$zetat_rescaled
  df_zetat = df[commodity %in% commodity_list,][,c('spatial_id', 'commodity', 'zetat')]
  
  if (country=='argentina'){
 
    #Define total surface
    df = df_l[, c('land_area','farmland') := list(land_area_km2*100, crops_permanent + crops_seasonal2 + crops_unaccounted + pasture + forage_seasonal + forage_permanent + forest_natural + forest_planted + not_used + not_usable + trails_and_parks + unaccounted)]
    df_land_a = df[year==year_ar,][, c('land_ha') := list(farmland)][,  list(country, region, spatial_id, land_ha)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, spatial_id)]

    #FAO-GAEZ/Calibrated productivity
    df = df_yp[country=='ARGENTINA' & unit=='tonne per hectare', list(spatial_id, commodity, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, commodity)]
    df = merge(df, df_zetat, by=c('spatial_id','commodity'))
    df_yp = df[,c('spatial_id','commodity','yield','zetat')]
    
    df = df_yc[commodity %in% crop_list, list(spatial_id, commodity, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, commodity)]
    df = merge(df, df_zetat, by=c('spatial_id','commodity'))
    df = df[,commodity:='crops'][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
    df_yc = df[,c('spatial_id','commodity','yield','zetat')]
    
    #Long to wide
    df = rbind(df_yp,df_yc)[,c('spatial_id','commodity','yield')]
    df_yield_a = dcast(df, spatial_id ~ commodity, value.var = c("yield"))
    df = rbind(df_yp,df_yc)[,c('spatial_id','commodity','zetat')]
    df_zetat_a = dcast(df, spatial_id ~ commodity, value.var = c("zetat"))
    
    #Merge
    df_a = merge(df_land_a, df_yield_a, by=c('spatial_id'), all.x=T)
    df_a = merge(df_a, df_zetat_a, by=c('spatial_id'), suffixes = c("","_zetat"), all.x=T)
    
  } else {
    
    #Define total surface
    df = df_l[, c('land_area','farmland') := list(land_area_km2*100, crops_permanent + crops_seasonal2 + pasture_natural + pasture_planted + forest_natural + forest_planted + other)]
    df_land_b = df[year==year_br,][, c('land_ha') := list(farmland)][,  list(country, region, spatial_id, land_ha)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, region, spatial_id)]

    #FAO-GAEZ/Calibrated productivity
    df = df_yp[country=='BRAZIL' & unit=='tonne per hectare',][, list(spatial_id, commodity, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, commodity)]
    df = merge(df, df_zetat, by=c('spatial_id','commodity'))
    df_yp = df
    
    df = df_yc[commodity %in% crop_list, list(spatial_id, commodity, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, commodity)]
    df = merge(df, df_zetat, by=c('spatial_id','commodity'))
    df = df[,commodity:='crops'][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
    df_yc = df[,c('spatial_id','commodity','yield','zetat')]
    
    #Long to wide
    df = rbind(df_yp,df_yc)[,c('spatial_id','commodity','yield')]
    df_yield_b = dcast(df, spatial_id ~ commodity, value.var = c("yield"))
    df = rbind(df_yp,df_yc)[,c('spatial_id','commodity','zetat')]
    df_zetat_b = dcast(df, spatial_id ~ commodity, value.var = c("zetat"))
    
    #Merge
    df_b = merge(df_land_b, df_yield_b, by=c('spatial_id'), all.x=T)
    df_b = merge(df_b, df_zetat_b, by=c('spatial_id'), suffixes = c("","_zetat"), all.x=T)
  }
}

df = rbind(df_a,df_b)
df1=df[,c('country','region','spatial_id','land_ha')][, lapply(.SD, sum, na.rm=TRUE), by=list(country,region,spatial_id)]
df2=df[,c('country','region','spatial_id','crops','pasture','crops_zetat','pasture_zetat')][, lapply(.SD, median, na.rm=TRUE), by=list(country,region,spatial_id)]
uq=0.975
lq=0.025

#Crops
max_var = quantile(df2[crops>0,]$crops,uq)
min_var = quantile(df2[crops>0,]$crops,lq)
df2[crops>max_var,]$crops = max_var
df2[crops<min_var,]$crops = min_var
max_var = quantile(df2[crops_zetat>0,]$crops_zetat,uq)
min_var =  quantile(df2[crops_zetat>0,]$crops_zetat,lq)
df2[crops_zetat>max_var,]$crops_zetat = max_var
df2[crops_zetat<min_var,]$crops_zetat = min_var

#Pasture
max_var =  quantile(df2[pasture>0,]$pasture,uq)
min_var =  quantile(df2[pasture>0,]$pasture,lq)
df2[pasture>max_var,]$pasture = max_var
df2[pasture<min_var,]$pasture = min_var
max_var = quantile(df2[pasture_zetat>0,]$pasture_zetat,uq)
min_var = quantile(df2[pasture_zetat>0,]$pasture_zetat,lq)
df2[pasture_zetat>max_var,]$pasture_zetat = max_var
df2[pasture_zetat<min_var,]$pasture_zetat = min_var
df_land_productivity = merge(df1,df2, by=c('country','region','spatial_id'))

#Natural payoff
df = fread(paste0('inputs/parameters_supply_AN_curated.csv.gz'))
df_AN = df[,c('spatial_id','AN')]

#Elasticities
df = fread(paste0('inputs/parameters_supply.csv'))
df_luc_elasticities = df[, list(theta, lambda, frontier)][, lapply(.SD, median, na.rm=TRUE), by=list(frontier)]

#Merge all land parameters
df_land = merge(df_land_productivity, df_AN, by=c('spatial_id'))
df_land = merge(df_land, df_frontier, by=c('spatial_id'))
df_land = merge(df_land, df_luc_elasticities, by=c('frontier'))


######### Non-land parameters

#Labor endowment
df = fread('../../data/factors/clean/employment.csv.gz')
setnames(df, old=c(spatial_id_b), new=c('spatial_id'))
df_a = df[country=='ARGENTINA',c('country','region','spatial_id','employment_all','employment_agriculture')][, lapply(.SD, sum, na.rm=TRUE), by=list(country,region,spatial_id)]
df_b = df[country=='BRAZIL',c('country','region','spatial_id','employment_all','employment_agriculture')][, lapply(.SD, sum, na.rm=TRUE), by=list(country,region,spatial_id)]
df_L = rbind(df_a,df_b)
df_L[employment_agriculture>employment_all,]$employment_agriculture=df_L[employment_agriculture>employment_all,]$employment_all

#Wages in outside option
df = fread('../../data/factors/clean/wages_nonag.csv.gz')
setnames(df, old=c(spatial_id_b, 'wage_nonag'), new=c('spatial_id','wH_outside'))
df_a = df[country=='ARGENTINA',c('country','region','spatial_id','wH_outside')][, lapply(.SD, mean, na.rm=TRUE), by=list(country,region,spatial_id)]
df_b = df[country=='BRAZIL',c('country','region','spatial_id','wH_outside')][, lapply(.SD, mean, na.rm=TRUE), by=list(country,region,spatial_id)]
df = rbind(df_a,df_b)
price_normalization=1/75
df$wH_outside=df$wH_outside*price_normalization
df_wH_outside=df

#Intermediate input world prices
df = fread('../../data/factors/clean/fertilizer_prices.csv')
df = df[year==year_ar | year==year_br,]
wM_international = mean(df$price_USD_per_t)*price_normalization

#Domestic trade costs measure
df = fread('../../data/agribusiness/clean/distance_to_hub_all.csv.gz')
df = df[ma_type=='all_hubs_weighted']
df_0  = df[,list(country,county_id,ma)][, lapply(.SD, median, na.rm=TRUE), by=list(country,county_id)][!is.na(ma),]

df = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(country, region, state, state_id, county_id)]
df = df[, list(country, region, state_id, county_id)]
df$spatial_id=df$county_id
df$mesoregion_id=df$county_id
df$microregion_id=df$county_id
df1 = df[, list(country, region, state_id,  mesoregion_id, microregion_id, spatial_id,county_id)]
df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(country, region, state, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
df$spatial_id=df$amc_id
df2 = df[, list(country, region, state_id,  mesoregion_id, microregion_id, spatial_id, county_id)]
df3=rbind(df1,df2)

df = merge(df_0[,list(county_id,ma)],df_sa_units_cw[,list(county_id,spatial_id)],by=c('county_id'), all.y=T)
df = merge(df,df3[,list(country,county_id, microregion_id,mesoregion_id,state_id,region)], by=c('county_id'), all.y=T)
df = merge(df,df_frontier,by='spatial_id')

df_f = df[,c('frontier','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(frontier)][,c('ma_f'):=list(ma)]
df_c = df[,c('country','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(country)][,c('ma_c'):=list(ma)]
df_r = df[,c('region','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(region)][,c('ma_r'):=list(ma)]
df_s = df[,c('state_id','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(state_id)][,c('ma_s'):=list(ma)]
df_me = df[,c('mesoregion_id','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(mesoregion_id)][,c('ma_me'):=list(ma)]
df_mi = df[,c('microregion_id','ma')][, lapply(.SD, median, na.rm=TRUE), by=list(microregion_id)][,c('ma_mi'):=list(ma)]

df  = df[,list(frontier,country,region,state_id,mesoregion_id,microregion_id,spatial_id,ma)][, lapply(.SD, median, na.rm=TRUE), by=list(frontier,country,region,state_id,mesoregion_id,microregion_id,spatial_id)]
df = merge(df, df_mi[,c('microregion_id','ma_mi')], by='microregion_id',  all.x=T)
df = merge(df, df_me[,c('mesoregion_id','ma_me')], by='mesoregion_id',  all.x=T)
df = merge(df, df_s[,c('state_id','ma_s')], by='state_id',  all.x=T)
df = merge(df, df_r[,c('region','ma_r')], by='region',  all.x=T)
df = merge(df, df_c[,c('country','ma_c')], by='country',  all.x=T)
df = merge(df, df_f[,c('frontier','ma_f')], by='frontier',  all.x=T)
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_mi 
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_me 
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_s
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_r 
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_c 
df[is.na(ma)]$ma  = df[is.na(ma)]$ma_f 

df$tc = log(1/df$ma)
df$tc = df$tc/min(df$tc)
df$wM = wM_international*df$tc
df_wM = df[,list(spatial_id,wM)][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id)]

#Factor shares
df_gamma = setDT(read_excel('../../data/factors/raw/factor_shares.xlsx'))

#Merge all
df=merge(df_L,df_wH_outside, by=c('country','region','spatial_id'))
df=merge(df,df_wM, by=c('spatial_id'))

df$pasture_gamma_L = df_gamma[commodity=='pasture',]$land_share
df$pasture_gamma_H = df_gamma[commodity=='pasture',]$labor_share
df$pasture_gamma_M = 1-df$pasture_gamma_L-df$pasture_gamma_H

df$crops_gamma_L = 0.5*(df_gamma[commodity=='soybean',]$land_share + df_gamma[commodity=='maize',]$land_share)
df$crops_gamma_H = 0.5*(df_gamma[commodity=='soybean',]$labor_share + df_gamma[commodity=='maize',]$labor_share)
df$crops_gamma_M = 1-df$crops_gamma_L-df$crops_gamma_H

#Across-sector labor supply elasticity
df$psi = 2

#Across-location labor supply elasticity
df$varphi = 2

#All non-land factor parameters
df_nonland = df


######### Merge land and non-land parameters

df_supply = merge(df_land,df_nonland, by=c('country','region','spatial_id'))
df_supply = merge(df_supply, df_sa_units[,c('spatial_id','tC_per_ha')], by=c('spatial_id'), all.x=T)

df_supply[land_ha==0,]$land_ha = min(df_supply[land_ha>0,]$land_ha)
df_supply[employment_all==0,]$employment_all = min(df_supply[employment_all>0,]$employment_all)
df_supply[employment_agriculture==0,]$employment_agriculture = min(df_supply[employment_agriculture>0,]$employment_agriculture)

scale_AN = 1/30
scale_pasture = 5
df_supply$AN = df_supply$AN*scale_AN
df_supply$pasture = df_supply$pasture*scale_pasture

#Order by carbon intensity and frontier identifier
setorder(df_supply, tC_per_ha)
setorder(df_supply, frontier)
df_supply$spatial_id_name = df_supply$spatial_id
df_supply = df_supply[spatial_id %in% spatial_id_list, 
                      list(spatial_id_name, theta, lambda, land_ha, employment_all, employment_agriculture, 
                           psi, wH_outside, wM,
                           AN,
                           pasture, crops, 
                           pasture_zetat, crops_zetat, 
                           pasture_gamma_L, crops_gamma_L,
                           pasture_gamma_H, crops_gamma_H, 
                           pasture_gamma_M, crops_gamma_M,
                           varphi)]
spatial_id_list_final = unique(df_supply$spatial_id)



################################ Trade costs

df_u =  unique(df_sa_units_cw[,list(spatial_id_estimation,spatial_id)])

#Spatial aggregation
destination_unit = 'destination_bloc'
df = fread(paste0('inputs/parameters_tradecosts_',destination_unit,'.csv.gz'))
setnames(df, old=c('tc_ic','spatial_id'), new=c('tc','spatial_id_estimation') )
df = merge(df[,list(spatial_id_estimation, destination_unit, tc)], df_u, by='spatial_id_estimation')

df$destination_bloc=''
df[destination_unit %in% c("SOUTH AMERICA"),]$destination_bloc = 'SA'
df[destination_unit %in% c("EUROPE"),]$destination_bloc = 'EU'
df[destination_unit %in% c("ASIA"),]$destination_bloc = 'AS'
df[destination_unit %in% c("AFRICA","MIDDLE EAST","CENTRAL AMERICA","NORTH AMERICA","OCEANIA"),]$destination_bloc = 'ROW'
df = df[, list(spatial_id, destination_bloc, tc)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,destination_bloc)]
df = dcast(df, spatial_id ~ destination_bloc, value.var = "tc")[, list(spatial_id, SA, EU, AS, ROW)]

#Add spatial unit names
df = merge(df, df_frontier, by ='spatial_id')
df = merge(df, df_sa_units[,c('spatial_id','tC_per_ha')], by=c('spatial_id'), all.x=T)
setorder(df, tC_per_ha)
setorder(df, frontier)
df$spatial_id_name=df$spatial_id
df_tc = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name, SA, EU, AS, ROW)]
spatial_id_list_final = unique(df_tc$spatial_id)



################################ Nation indicators

df = unique(df_sa_units[, list(spatial_id, country)])
df$source_a = 1*(df$country=='ARGENTINA')
df$source_b = 1*(df$country=='BRAZIL')
df = merge(df, df_frontier, by ='spatial_id')
df = merge(df, df_sa_units[,c('spatial_id','tC_per_ha')], by=c('spatial_id'), all.x=T)
setorder(df, tC_per_ha)
setorder(df, frontier)
df$spatial_id_name=df$spatial_id
df_in = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name, source_a, source_b)]
spatial_id_list_final = unique(df_in$spatial_id)



################################ Demand parameters

df = fread('inputs/parameters_demand.csv')
df$destination_bloc=''
df[destination_continent %in% c("SOUTH AMERICA"),]$destination_bloc = 'SA'
df[destination_continent %in% c("EUROPE"),]$destination_bloc = 'EU'
df[destination_continent %in% c("ASIA"),]$destination_bloc = 'AS'
df[destination_continent %in% c("AFRICA","MIDDLE EAST","NORTH AMERICA","OCEANIA","CENTRAL AMERICA"),]$destination_bloc = 'ROW'
df$product = ''
df[commodity %in% crop_list,]$product = 'crops'
df[commodity %in% c("beef"),]$product = 'beef'
product_list = c('beef','crops')
df=df[product %in% product_list,]
col_list = c('year', 'origin_id', 'origin_nation', 'destination_id' , 'destination_continent', 'destination_bloc', 'product',
             'eta_l', 'eta_m', 'eta_u', 'X_jt', 'a_ijtc', 'a_njtc', 'a_jtc')
col_list_agg = c('year', 'origin_id', 'origin_nation', 'destination_id' , 'destination_continent', 'destination_bloc', 'product')
df=df[,..col_list][, lapply(.SD, median, na.rm=TRUE), by=col_list_agg]
df_main=df


####### Taste shifters ijc
df_u =  unique(df_sa_units_cw[,list(spatial_id_estimation,spatial_id)])
df = df_main[, list(origin_id, product, destination_bloc, a_ijtc)][, lapply(.SD, median, na.rm=TRUE), by=list(origin_id, product, destination_bloc)]
df = merge(df, df_u, by.x='origin_id', by.y = 'spatial_id_estimation')
max_var=quantile(df$a_ijtc,0.75)
min_var=quantile(df$a_ijtc,0.25)
df[a_ijtc>max_var,]$a_ijtc = max_var
df[a_ijtc<min_var,]$a_ijtc = min_var
df = df[,c('spatial_id','product','destination_bloc','a_ijtc')][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id, product, destination_bloc)]
df = dcast(df, spatial_id + product ~ destination_bloc, value.var = "a_ijtc")
df = df[, list(spatial_id, product, SA, EU, AS, ROW)]

df_aux1 = df_sa_units[,c('spatial_id')]
df_aux1$product = 'crops' 
df_aux2 = df_sa_units[,c('spatial_id')]
df_aux2$product = 'beef'
df_aux=rbind(df_aux1,df_aux2)
df = merge(df, df_aux, by=c('spatial_id','product'), all.y=T)

df_aux = df[, list(product, SA, EU, AS, ROW)][, lapply(.SD, median, na.rm=TRUE), by=list(product)]
df = merge(df, df_aux, by='product', suffixes=c('','.aux') )
df[is.na(SA),]$SA = df[is.na(SA),]$SA.aux
df[is.na(EU),]$EU = df[is.na(EU),]$EU.aux
df[is.na(AS),]$AS = df[is.na(AS),]$AS.aux
df[is.na(ROW),]$ROW = df[is.na(ROW),]$ROW.aux
df = df[, list(spatial_id, product, SA, EU, AS, ROW)]

df_aux = df[!is.na(SA),]
df_aux = df_aux[, ratioSA := SA/(EU+AS+ROW)]
df_aux = df_aux[, list(spatial_id,ratioSA)]
df = merge(df, df_aux, by=c('spatial_id'), all.x=T)
df[is.na(ratioSA),]$ratioSA = median(df$ratioSA, na.rm=T)
df[is.na(SA),]$SA = (df[is.na(SA),]$EU+df[is.na(SA),]$AS+df[is.na(SA),]$ROW)*df[is.na(SA),]$ratioSA
df = df[,c('spatial_id','product','SA','EU','AS','ROW')]

df = merge(df, df_frontier, by ='spatial_id')
df = merge(df, df_sa_units[,c('spatial_id','tC_per_ha')], by=c('spatial_id'), all.x=T)
setorder(df, tC_per_ha)
setorder(df, frontier)
setorder(df, product)
df$spatial_id_name=df$spatial_id
df_aijc = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name, product, SA, EU, AS, ROW)]
spatial_id_list_final = unique(df_aijc$spatial_id)


####### Taste shifters njc
df = df_main[, list(origin_nation, product, destination_bloc, a_njtc)][, lapply(.SD, median, na.rm=TRUE), by=list(origin_nation, product, destination_bloc)]
max_var=quantile(df$a_njtc,0.75)
min_var=quantile(df$a_njtc,0.25)
df[a_njtc>max_var,]$a_njtc = max_var
df[a_njtc<min_var,]$a_njtc = min_var
df = dcast(df, origin_nation + product ~ destination_bloc, value.var = "a_njtc")
df=df[, list(origin_nation, product, SA, EU, AS, ROW)]

df_aux = df[!is.na(SA),]
df_aux = df_aux[, ratioSA := SA/(EU+AS+ROW)]
df_aux = df_aux[, list(origin_nation,ratioSA)]
df = merge(df, df_aux, by=c('origin_nation'), all.x=T)
df[is.na(ratioSA),]$ratioSA = median(df$ratioSA, na.rm=T)
df[is.na(SA),]$SA = (df[is.na(SA),]$EU+df[is.na(SA),]$AS+df[is.na(SA),]$ROW)*df[is.na(SA),]$ratioSA
setorder(df, product)
df_anjc=df[,list(origin_nation, product, SA, EU, AS, ROW)]


####### Taste shifters jc
df = df_main[, list(product, destination_bloc, a_jtc)][, lapply(.SD, median, na.rm=TRUE), by=list(product, destination_bloc)]
max_var =quantile(df$a_jtc,0.5)
min_var =quantile(df$a_jtc,0.25)
df[a_jtc>max_var,]$a_jtc = max_var
df[a_jtc<min_var,]$a_jtc = min_var
df = dcast(df, product ~ destination_bloc, value.var = "a_jtc")
df=df[, list(product, SA, EU, AS, ROW)]

df_aux = df[!is.na(SA),]
df_aux = df_aux[, ratioSA := SA/(EU+AS+ROW)]
df_aux = df_aux[, list(ratioSA)]
df$ratioSA = df_aux$ratioSA
df[is.na(ratioSA),]$ratioSA = median(df$ratioSA, na.rm=T)
df[is.na(SA),]$SA = (df[is.na(SA),]$EU+df[is.na(SA),]$AS+df[is.na(SA),]$ROW)*df[is.na(SA),]$ratioSA
setorder(df, product)
df_ajc=df[,list(product, SA, EU, AS, ROW)]


#### Demand parameters and income
df_x = df_main[, list(destination_bloc, eta_l, eta_m, eta_u)][, lapply(.SD, median, na.rm=TRUE), by=list(destination_bloc)]

#Import expenditure of trading partners
df = df_main[, list(destination_bloc,destination_id, product, X_jt)][, lapply(.SD, median, na.rm=TRUE), by=list(destination_bloc,destination_id,product)]
df_y = df[, list(destination_bloc, X_jt)][, lapply(.SD, sum, na.rm=TRUE), by=list(destination_bloc)]

#Scale SA expenditure to target domestic market share
domestic_share=0.5
export_value = sum(df_y$X_jt)
df_y[destination_bloc=='SA']$X_jt = export_value*(domestic_share/(1-domestic_share))
df=merge(df_x,df_y,by=c('destination_bloc'))
df=df[,list(destination_bloc, eta_l, eta_m, eta_u,X_jt)]
df=dcast(melt(df, id.vars = "destination_bloc"), variable ~ destination_bloc)
df=df[, list(variable, SA, EU, AS, ROW)]
df$order = c(2,3,4,1)
setorder(df, order)
df[,order:=NULL]
df_demand=df



################################ Emissions

stage_list= c('Feed','Farm')
product_list = c('Beef (beef herd)','Soybean Oil','Maize (Meal)')
commodity_list = c('beef','soybean','maize')

#Non-LUC emissions
df = fread('../../data/environmental/raw/GHG-emissions-by-life-cycle-stage-OurWorldinData-upload.csv')
setnames(df, old=c('Food product','Land use change','Animal Feed'), new=c('product','LUC','Feed'))
df = df[product %in% c('Beef (beef herd)','Maize (Meal)','Soybean Oil','Tofu'), list(product, LUC, Feed, Farm)]
df <- gather(df, stage, emissions_per_kg, LUC:Farm, factor_key=TRUE)
df = setDT(df)[product %in% product_list & stage %in% stage_list, list(product, emissions_per_kg)][, lapply(.SD, sum, na.rm=TRUE), by=product]
df = dcast(melt(df, id.vars = "product"), variable ~ product)
setnames(df, old=product_list, new=commodity_list)
df$crops = (df$soybean + df$maize)/2
df_nluc_data = df[,list(variable,beef,crops)]

#LUC emissions
df_sa = fread("../../data/environmental/clean/carbonstocks_argentinabrazil.csv.gz")[type %in% ctype_list,]
df_a = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, land_area_ha:=land_area_km2*100][, list(county_id,land_area_ha)]
df_b = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, land_area_ha:=land_area_km2*100][, list(county_id,land_area_ha)]
df = rbind(df_a,df_b)
df_sa = merge(df, df_sa, by='county_id')
df = merge(df_sa, df_sa_units_cw[,list(county_id,spatial_id)], by='county_id')
setnames(df, old=c(cmeasure), new=c('tC_per_ha'))
df_luc_data = df[, list(spatial_id,tC_per_ha)][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id)]

#Yields (t/ha)
df = df_land[, list(spatial_id, pasture, crops)]

#Non-LUC emissions (flows)
df$beef_nluc_per_t = df_nluc_data$beef
df$crops_nluc_per_t = df_nluc_data$crops
df$beef_nluc_per_ha = df$beef_nluc_per_t*df$pasture
df$crops_nluc_per_ha = df$crops_nluc_per_t*df$crops
df_nluc = df[, list(spatial_id,beef_nluc_per_ha,crops_nluc_per_ha,beef_nluc_per_t,crops_nluc_per_t,pasture,crops)][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id)]

#LUC emissions (flows + stocks)
C_CO2_multiplier = 3.67 
production_years = 20 
df=df_luc_data
df$tCO2_per_ha_stock = df$tC_per_ha*C_CO2_multiplier
df$tCO2_per_ha_flow = df$tCO2_per_ha_stock/production_years
df_luc = df[,list(spatial_id,tCO2_per_ha_stock,tCO2_per_ha_flow)]

#Combined
df = merge(df_nluc, df_luc, by='spatial_id')

annual_deforestation_mha=1.82 
total_land_pasture_mha=238 
total_land_crops_mha=100 
annual_pasture_expansion_mha = 2.57 
annual_cropland_expansion_mha = 1.6
beef_share=0.8
deforestation_rate_beef = (beef_share*annual_deforestation_mha)/annual_pasture_expansion_mha
deforestation_rate_crops = ((1-beef_share)*annual_deforestation_mha)/annual_cropland_expansion_mha

df$beef_luc_flow_per_ha = df$tCO2_per_ha_flow*deforestation_rate_beef
df$crops_luc_flow_per_ha = df$tCO2_per_ha_flow*deforestation_rate_crops
df$beef_total_flow_per_ha = df$beef_nluc_per_ha + df$beef_luc_flow_per_ha 
df$crops_total_flow_per_ha = df$crops_nluc_per_ha + df$crops_luc_flow_per_ha 
df$beef_total_flow_per_t = df$beef_total_flow_per_ha/df$pasture
df$crops_total_flow_per_t = df$crops_total_flow_per_ha/df$crops

df = merge(df, df_frontier, by='spatial_id')
df = merge(df, df_sa_units[,c('spatial_id','tC_per_ha')], by=c('spatial_id'), all.x=T)
setorder(df, tC_per_ha)
setorder(df, frontier)
df$spatial_id_name = df$spatial_id

df_e = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name,tCO2_per_ha_stock,tCO2_per_ha_flow)]
spatial_id_list_final = unique(df_e$spatial_id)

df_en = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name,beef_nluc_per_t,crops_nluc_per_t)]
spatial_id_list_final = unique(df_en$spatial_id)

df_et = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name, beef_total_flow_per_t,crops_total_flow_per_t)]
spatial_id_list_final = unique(df_et$spatial_id)

df = merge(df_et, df_sa_units[,list(spatial_id, region)], by.x='spatial_id_name',by.y='spatial_id', sort=F)
df_r = df[,-c('spatial_id_name')][, lapply(.SD, mean, na.rm=TRUE), by=list(region)]
df = merge(df[,list(spatial_id_name,region)],df_r, by='region',sort=F)
df$spatial_id = df$spatial_id_name
df_et_r = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name, beef_total_flow_per_t,crops_total_flow_per_t)]
spatial_id_list_final = unique(df_et_r$spatial_id)



################################ Intermediaries


################ N

product_type_trase_a = c('SOYBEAN CAKE', 'SOYBEAN OIL', 'SOYBEANS')
product_type_trase_b = c('Soy bean equivalents',
                         'Corn equivalents',
                         'BEEF BONELESS','BEEF','BEEF OFFALS','MEAT PREPARATIONS','BEEF DRIED SALTED SMOKED','CATTLE')
id_type_trase = c("county_known")

df = fread("../../data/agribusiness/clean/soybean_argentina.csv.gz")[product_type %in% product_type_trase_a,][id_type==id_type_trase,]
setnames(df, old=c(spatial_id_a), new=c('spatial_id'))
df = df[, list(year, spatial_id, commodity, exporter_group)][ , .(N_i = length(unique(exporter_group))), by=list(year,spatial_id,commodity)]
df = df[, list(spatial_id, commodity, N_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
df_n_a = df[,list(spatial_id,N_i)]

df = fread("../../data/agribusiness/clean/beef_brazil.csv.gz")[product_type %in% product_type_trase_b,][id_type==id_type_trase,]
setnames(df, old=c(spatial_id_b), new=c('spatial_id'))
df = df[, list(year, spatial_id, commodity, exporter_group)][ , .(N_i = length(unique(exporter_group))), by=list(year,spatial_id,commodity)]
df = df[, list(spatial_id, commodity, N_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
df_n_bb = df[,list(spatial_id,N_i)]

df = fread("../../data/agribusiness/clean/soybean_brazil.csv.gz")[product_type %in% product_type_trase_b,][id_type==id_type_trase,]
setnames(df, old=c(spatial_id_b), new=c('spatial_id'))
df = df[, list(year, spatial_id, commodity, exporter_group)][ , .(N_i = length(unique(exporter_group))), by=list(year,spatial_id,commodity)]
df = df[, list(spatial_id, commodity, N_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
df_n_bs = df[,list(spatial_id,N_i)]

df = fread("../../data/agribusiness/clean/maize_brazil.csv.gz")[product_type %in% product_type_trase_b,][id_type==id_type_trase,]
setnames(df, old=c(spatial_id_b), new=c('spatial_id'))
df = df[, list(year, spatial_id, commodity, exporter_group)][ , .(N_i = length(unique(exporter_group))), by=list(year,spatial_id,commodity)]
df = df[, list(spatial_id, commodity, N_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id,commodity)]
df_n_bm = df[,list(spatial_id,N_i)]

#Brazil
df=merge(df_n_bb,df_n_bm,by='spatial_id')
df=merge(df,df_n_bs,by='spatial_id')
df$N_i_s = df$N_i
df$N_i_b = df$N_i.x
df$N_i_m = df$N_i.y
df$N_i = (df$N_i_b+df$N_i_s+df$N_i_m)/3
df$N_i_c = (df$N_i_s+df$N_i_m)/2
ratio_bs = mean(df$N_i_b/df$N_i_s)
ratio_ms = mean(df$N_i_m/df$N_i_s)
ratio_ar_br = mean(df_n_a$N_i)/mean(df$N_i_s)
df_n_b = df[,list(spatial_id,N_i,N_i_b,N_i_c)]

#Argentina
df=df_n_a
df$N_i_s = df$N_i/ratio_ar_br
df$N_i_b = df$N_i_s*ratio_bs
df$N_i_m = df$N_i_s*ratio_ms
df$N_i = (df$N_i_b+df$N_i_s+df$N_i_m)/3
df$N_i_c = (df$N_i_s+df$N_i_m)/2
df_n_a = df[,list(spatial_id,N_i,N_i_b,N_i_c)]

#Combined
df = rbind(df_n_a,df_n_b)
df = merge(df, df_sa_units[,list(spatial_id,tC_per_ha)], by=c('spatial_id'), all.y=T)
df = merge(df, df_frontier, by=c('spatial_id'))

df[is.na(N_i),]$N_i = quantile(df$N_i,0.05,na.rm=T)
df[is.na(N_i_b),]$N_i_b = quantile(df$N_i_b,0.05,na.rm=T)
df[is.na(N_i_c),]$N_i_c = quantile(df$N_i,0.05,na.rm=T)
df$N_i = round(df$N_i,0)
df$N_i_b = round(df$N_i_b,0)
df$N_i_c = round(df$N_i_c,0)
df[N_i==0,]$N_i = 1
df[N_i_b==0,]$N_i_b = 1
df[N_i_c==0,]$N_i_c = 1

setorder(df, tC_per_ha)
setorder(df, frontier)
df$N_beef = df$N_i_b 
df$N_crops = df$N_i_c
df$spatial_id_name = df$spatial_id
df_n = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name,N_beef,N_crops)]
spatial_id_list_final = unique(df_n$spatial_id)



################ MP

product_type_trase_mp_a = c('SOYBEANS')
product_type_trase_mp_b = c('Soy bean equivalents','Corn equivalents','BEEF BONELESS','BEEF')

df = fread(paste0("inputs/parameters_marginalproduct_destination_bloc.csv.gz"))
setnames(df, old=c('mp_ic'), new=c('mp'))
df = df[product_type %in% product_type_trase_mp_a | product_type %in% product_type_trase_mp_b ,]
df = df[, list(spatial_id, commodity, mp)][, lapply(.SD, median, na.rm=TRUE), by=list(spatial_id,commodity)]
df = dcast(df, spatial_id ~ commodity, value.var = "mp")
setnames(df, new = c('spatial_id','mp_beef','mp_soybean','mp_maize'))

df = merge(df, df_sa_units[,list(spatial_id,tC_per_ha)], by=c('spatial_id'), all.y=T)
df = merge(df, df_frontier, by=c('spatial_id'), all.x=T)

df[is.na(mp_beef),]$mp_beef = quantile(df$mp_beef,0.5,na.rm=T)
df[is.na(mp_soybean),]$mp_soybean = quantile(df$mp_soybean,0.5,na.rm=T)
df[is.na(mp_maize),]$mp_maize = quantile(df$mp_maize,0.5,na.rm=T)
df$mp_crops = (df$mp_soybean+df$mp_maize)/2

setorder(df, tC_per_ha)
setorder(df, frontier)
df$spatial_id_name = df$spatial_id 
df_mp = df[spatial_id %in% spatial_id_list_final, list(spatial_id_name,mp_beef,mp_crops)]
spatial_id_list_final = unique(df_mp$spatial_id)


################################ Weights

#Beef
for(country in c('argentina','brazil')){
  
  if(country=='argentina'){
    df = fread(paste0("../../data/production/clean/cattlestock_",country,"_county_census.csv.gz"))[year==year_ar,]
  }else{
    df = fread(paste0("../../data/production/clean/cattlestock_",country,"_county_census.csv.gz"))[year==year_br,]
  }
  
  df = merge(df[,list(county_id,total_cattlestock)], df_sa_units_cw[,list(county_id,spatial_id)], by='county_id')
  df = df[, list(spatial_id,total_cattlestock)][, lapply(.SD, sum, na.rm=TRUE), by=list(spatial_id)]
  
  if(country=='argentina'){df_w_a = df}else{df_w_b = df}
  
}

df = rbind(df_w_a,df_w_b)
df = merge(df_sa_units[spatial_id %in% spatial_id_list_final, list(spatial_id)], df, by=c('spatial_id'), all.x=T)
spatial_id_list_final = unique(df$spatial_id)
df[is.na(total_cattlestock),]$total_cattlestock = round(quantile(df$total_cattlestock,0.1,na.rm=T),0)
df_w_beef = df

#Crops
for(country in c('argentina','brazil')){
  
  if(country=='argentina'){
    df = fread(paste0("../../data/production/clean/production_",country,".csv.gz"))[year==year_ar & commodity %in% crop_list,]
  }else{
    df = fread(paste0("../../data/production/clean/production_",country,".csv.gz"))[year==year_br & commodity %in% crop_list,]
  }
  
  df = merge(df, df_sa_units_cw[,list(county_id,spatial_id)], by='county_id')
  df = df[,commodity:='crops'][, list(spatial_id,production_t,commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(spatial_id,commodity)]
  df = dcast(df, spatial_id ~ commodity, value.var = "production_t")
  
  if(country=='argentina'){df_w_a = df}else{df_w_b = df}
  
}

df = rbind(df_w_a,df_w_b)
df = merge(df_sa_units[spatial_id %in% spatial_id_list_final, list(spatial_id)], df, by=c('spatial_id'),all.x=T)
spatial_id_list_final = unique(df$spatial_id)
df[is.na(crops),]$crops = round(quantile(df$crops,0.01,na.rm=T),0)
df_w_crops = df

df = merge(df_w_beef[,list(spatial_id,total_cattlestock)], df_w_crops, by='spatial_id')
setnames(df, old = c('total_cattlestock','crops'),  new = c('weight_beef','weight_crops'))
df = merge(df, df_sa_units[,c('spatial_id','region','tC_per_ha')], by='spatial_id')
df = merge(df, df_frontier, by='spatial_id')

setorder(df, tC_per_ha)
setorder(df, frontier)
df$spatial_id_name = df$spatial_id 
df_w = df[spatial_id %in% spatial_id_list_final,list(spatial_id_name,weight_beef,weight_crops,region,frontier,tC_per_ha)]
spatial_id_list_final = unique(df_w$spatial_id)


################################ Spatial untis

df_list = merge(df_sa_units, df_frontier, by='spatial_id')
setorder(df_list, tC_per_ha)
setorder(df_list, frontier)
df_list$spatial_id_name = df_list$spatial_id
df_su = df_list[spatial_id %in% spatial_id_list_final,][,list(spatial_id,spatial_id_name,region,frontier)]


################################ Export to workbook

list_of_datasets <- list("supply"=df_supply, "tc"=df_tc, "indicator_in"=df_in, 
                         "a_ij_c"=df_aijc, "a_nj_c"=df_anjc, "a_j_c"=df_ajc,"demand_j"=df_demand,
                         "N"=df_n, "MP"=df_mp,
                         "emissions_luc"=df_e, "emissions_nluc"=df_en, "emissions_total"=df_et, "emissions_total_region"=df_et_r, "weights"=df_w,
                         "spatial_units"=df_su)
write_xlsx(list_of_datasets, path = "../3_simulations/aux_params/parameters.xlsx")

